CMAP hosts data on a Microsoft SQL Server. An R user will need the following package to connect to the database: a) “DBI”, for database interface; b) “odbc” for connecting to the database using DBI interface.
But before that, for the Mac operating system, a user needs to install the unixODBC library and database drivers. We suggest using SQL Server ODBC drivers (Free TDS). Using Homebrew, run the following commands to install the suggested module.
In case of Linux operating system, first, install Anaconda distribition, and then run the following commands to install suggested module.
Please follow the link to see other drivers available for installation.
In addition, a user may require some additional R package for downloading, processing and visualizing the data. Run the following commands to install some of the essential packages.
## Package "DBI" provide interface to the database
install.packages("DBI")
## Driver for the database
install.packages("odbc")
## Package for data processing:
install.packages("dbplyr")
install.packages("plyr")
## Package for visualization:
install.packages("ggplot2")
install.packages("plotly")
User can connect to the database using the login credentials (Direct connect) suggested in the examples.
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
dbDisconnect(con)
Attributes of a table includes:
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
## Choose table:
table.name <- "tblsst_AVHRR_OI_NRT"
## Variable name in the table
# tbl.fields <- dbListFields(con,table.name)
# print(tbl.fields)
## collect sample data
tbl.fields <- getDataSample(con,table.name,n=5)
print(tbl.fields)
## lat lon time sst ID
## 1 -89.875 -179.875 2017-12-03 NA 1
## 2 -89.875 -179.625 2017-12-03 NA 2
## 3 -89.875 -179.375 2017-12-03 NA 3
## 4 -89.875 -179.125 2017-12-03 NA 4
## 5 -89.875 -178.875 2017-12-03 NA 5
## Class of each column in the table
tbl.colClass <- getColClass(con,table.name)
print(tbl.colClass)
## Variable Type
## 1 lat numeric
## 2 lon numeric
## 3 time Date
## 4 sst numeric
## 5 ID integer64
## Number of observations
nObs <- getObservationCount(con,table.name)
nObs
## [1] 1760215552
## Space/time information of the table
# tbl.spaceTimeInfo <- getSpaceTimeRange(con,table.name)
# print(tbl.spaceTimeInfo)
## Numeric variable range:
# tbl.rangeNumVar <- getRangeNumVar(con,table.name)
# print(tbl.rangeNumVar)
dbDisconnect(con)
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
## Input: Table name; variable name, space time range information
table.name = 'tblsst_AVHRR_OI_NRT' # table name
sel.var = 'sst' # choose variable
range.var <- list() # Range variable information
range.var$time <- c('2016-04-30', '2016-04-30')
range.var$lat <- c(10,70)
range.var$lon <- c(-180,-80)
## Space/time/number of observations/summary
tbl.subsetSpaceTimeSummary <- getSubsetSpaceTimeRange(con,table.name,range.var)
print(tbl.subsetSpaceTimeSummary)
## # A tibble: 1 x 6
## time_min lat_min lon_min time_max lat_max lon_max
## <date> <dbl> <dbl> <date> <dbl> <dbl>
## 1 2016-04-30 10.1 -180. 2016-04-30 69.9 -80.1
# Summary of the data:
tbl.subsetSummary <- getSubsetRangeNumVar(con, table.name, range.var)
print(tbl.subsetSummary)
## Variable min max sd
## 1 lat 10.125000 69.87500 17.320448
## 2 lon -179.875000 -80.12500 28.867574
## 3 sst -1.800006 31.41999 9.571941
dbDisconnect(con)
To retrieve the data from CMAP, a user need to specify following parameters:
Use getTableData function to download the data as data frame.
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
## Input: Table name; variable name, space time range information
table.name = 'tblsst_AVHRR_OI_NRT' # table name
sel.var = 'sst' # choose variable
range.var <- list() # Range variable [lat,lon,time]
range.var$lat <- c(10,70)
range.var$lon <- c(-180,-80)
range.var$time <- c('2016-04-30', '2016-04-30')
## Subset selection:
tbl.subset <- getTableData(con, table.name, sel.var, range.var)
head(tbl.subset)
## # A tibble: 6 x 4
## lat lon time sst
## <dbl> <dbl> <date> <dbl>
## 1 10.1 -180. 2016-04-30 27.9
## 2 10.1 -180. 2016-04-30 27.9
## 3 10.1 -179. 2016-04-30 27.8
## 4 10.1 -179. 2016-04-30 27.8
## 5 10.1 -179. 2016-04-30 27.8
## 6 10.1 -179. 2016-04-30 27.8
## Ordering of the numeric variable:
# orderby <- c('time','lat','lon') # Specify orderby variable
# tbl.subset <- getTableData(con, table.name, sel.var, range.var,orderby)
# head(tbl.subset)
dbDisconnect(con)
To avoid pulling large dataset from the CMAP server, especially when not required, user may be interested in pulling aggregated data. In addition to the essential input variables, a user need to specify the aggregate variable(agg.var).
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
## Input: Table name; variable name, space time range information
table.name = 'tblsst_AVHRR_OI_NRT' # table name
sel.var = 'sst' # choose variable
range.var <- list() # Range variable [lat,lon,time]
range.var$lat <- c(25,30)
range.var$lon <- c(-160,-155)
range.var$time <- c('2016-03-29', '2016-05-29')
## Aggregate
agg.var <- 'time' # Specify aggregate variable
tbl.subset <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset)
## # A tibble: 6 x 2
## time sst
## <date> <dbl>
## 1 2016-03-29 21.3
## 2 2016-03-30 20.6
## 3 2016-03-31 20.6
## 4 2016-04-01 20.7
## 5 2016-04-02 20.8
## 6 2016-04-03 20.9
dbDisconnect(con)
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
## Setup
# source = './MGL1704.csv'
# table.name = "tblSST_AVHRR_OI_NRT"
# sel.var = "sst"
# latMargin = 0.3
# lonMargin = 0.3
# timeMargin = 1
## For the desired table, query a variable therein
# res = matchSource_onetable(con, source, table.name, sel.var,
# latMargin, lonMargin, timeMargin)
dbDisconnect(con)
Call plot_depth function to obtain plot_ly/ggplot object.
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
#
# Inpit variable:
table.list <- c('tblArgoMerge_REP', 'tblPisces_NRT', 'tblDarwin_Chl_Climatology')
var.list <- c('argo_merge_chl_adj', 'CHL', 'chl01_darwin_clim')
#
selIndex <- 1 # selected argo_merge_chl_adj from tblArgoMerge_REP
table.name <- table.list[selIndex]
sel.var <- var.list[selIndex]
#
range.var <- list()
range.var$lat <- c(20,24)
range.var$lon <- c( -170, -150)
range.var$depth <- c(0, 1500)
range.var$time <- c('2016-04-30', '2016-04-30')
## Subset selection: data retrieval
agg.var <- 'depth'
tbl.subset <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset)
## # A tibble: 6 x 2
## depth argo_merge_chl_adj
## <dbl> <dbl>
## 1 4.10 NA
## 2 4.20 NA
## 3 5.90 NA
## 4 6 NA
## 5 7.20 0.0180
## 6 7.80 NA
## Plot -- Depth profiles:
p <- plot_depth(tbl.subset, 'plotly',sel.var)
p
dbDisconnect(con)
Call plot_regMap function to obtain plot_ly/ggplot object.
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
# Inpit variable:
table.name = 'tblsst_AVHRR_OI_NRT'
sel.var = 'sst'
range.var <- list()
range.var$lat <- c(10,70)
range.var$lon <- c(-180,-80)
range.var$time <- c('2016-04-30', '2016-04-30')
## Data retrieval
tbl.subset <- getTableData(con, table.name, sel.var, range.var, order.var= c('lat','lon'))
head(tbl.subset)
## # A tibble: 6 x 4
## lat lon time sst
## <dbl> <dbl> <date> <dbl>
## 1 10.1 -80.1 2016-04-30 29.6
## 2 10.1 -80.4 2016-04-30 29.8
## 3 10.1 -80.6 2016-04-30 29.9
## 4 10.1 -80.9 2016-04-30 29.9
## 5 10.1 -81.1 2016-04-30 29.8
## 6 10.1 -81.4 2016-04-30 29.8
## Plot - regional map
# out <- plot_regMap(con, table.name,sel.var,range.var, type = 'ggplot')
out <- plot_regMap(con, table.name,sel.var,range.var, type = 'plotly')
out$plot
dbDisconnect(con)
Call plot_ts function to obtain plot_ly/ggplot object.
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
# Input variable:
table.list <- c('tblSST_AVHRR_OI_NRT', 'tblAltimetry_REP', 'tblPisces_NRT')
var.list <- c('sst', 'sla', 'NO3')
selIndex <- 1 # selected "sst" from the table "tblSST_AVHRR_OI_NRT"
table.name <- table.list[selIndex]
sel.var <- var.list[selIndex]
## Example I:
range.var <- list()
range.var$lat <- c(25,30)
range.var$lon <- c(-160,-155)
range.var$time <- c('2016-03-29', '2016-05-29')
## Subset selection: data retrieval
agg.var <- 'time'
tbl.subset <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset,20)
## # A tibble: 20 x 2
## time sst
## <date> <dbl>
## 1 2016-03-29 21.3
## 2 2016-03-30 20.6
## 3 2016-03-31 20.6
## 4 2016-04-01 20.7
## 5 2016-04-02 20.8
## 6 2016-04-03 20.9
## 7 2016-04-04 21.1
## 8 2016-04-05 20.9
## 9 2016-04-06 20.9
## 10 2016-04-07 20.7
## 11 2016-04-08 20.9
## 12 2016-04-09 21.0
## 13 2016-04-10 21.2
## 14 2016-04-11 21.1
## 15 2016-04-12 21.1
## 16 2016-04-13 21.1
## 17 2016-04-14 21.2
## 18 2016-04-15 21.4
## 19 2016-04-16 21.4
## 20 2016-04-17 21.4
## Plot -- Time series:
p <- plot_ts(tbl.subset,'plotly',sel.var)
p
dbDisconnect(con)
plot_xy function download the data, and output a list which contain a) plot object: plot_ly/ggplot; b) corresponding data tables.
library(DBI)
library(odbc)
library(dplyr)
library(cmap4r)
## connect to database
con <- DBI::dbConnect(odbc(),
Driver = "libtdsodbc.so", ## Free TDS driver used
Server = "128.208.239.15", ## IP address of the server
Database = "Opedia", ## Database name
UID = "ArmLab", ## User ID
PWD = "ArmLab2018", ## Password
Port = 1433) ## Port number
# Inpit variable:
#
table.list <- c('tblSST_AVHRR_OI_NRT', 'tblAltimetry_REP')
var.list <- c('sst', 'sla')
#
range.var <- list()
range.var$lat <- c(25,30)
range.var$lon <- c(-160, -155)
range.var$time <- c('2016-03-29', '2016-05-29')
#
agg.var <- 'time'
## -----------------------------------
## Dataset from table II
selIndex <- 1
table.name <- table.list[selIndex] # Specify table name I
sel.var <- var.list[selIndex] # Variable from table name I
tbl.subset.x <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset.x)
## # A tibble: 6 x 2
## time sst
## <date> <dbl>
## 1 2016-03-29 21.3
## 2 2016-03-30 20.6
## 3 2016-03-31 20.6
## 4 2016-04-01 20.7
## 5 2016-04-02 20.8
## 6 2016-04-03 20.9
## Dataset from table II
selIndex <- 2
table.name <- table.list[selIndex] # Specify table name II
sel.var <- var.list[selIndex] # Variable from table name II
tbl.subset.y <- getAggregatedTableData(con, table.name, sel.var, range.var, agg.var)
head(tbl.subset.y)
## # A tibble: 6 x 2
## time sla
## <date> <dbl>
## 1 2016-03-29 0.0242
## 2 2016-03-30 0.0228
## 3 2016-03-31 0.0215
## 4 2016-04-01 0.0208
## 5 2016-04-02 0.0197
## 6 2016-04-03 0.0189
## Plot - XY
# out <- plot_xy(con, table.list,var.list,range.var,agg.var,type = 'ggplot')
out <- plot_xy(con, table.list,var.list,range.var,agg.var,type = 'plotly')
out$plot
dbDisconnect(con)